##################################################################

# Gautam Nair
# gautam.nair@yale.edu
# Misperceptions of Relative Affluence and Support for International Transfers
# Make Table 8, Table 9, and Figure 6: Correlation between Reference Group Income and Global Median Income Estimate
# Table 8: Excludes 7 outlier responses over $100,000
# Table 9: Includes 7 outlier responses over $100,000
# There might be slight differences with the published version because a small amount of noise has been added to zip code medians 

##################################################################

# setwd("")

##################################################################
# loading packages
##################################################################

library(Hmisc)
library(foreign)
library(sandwich)
library(lmtest)
library(numDeriv)
library(stargazer)
library(ggplot2)
library(plyr)
library(gridExtra)
library(ri)
library(dplyr)
library(plyr)
library(scales)

##################################################################

rm(list=ls())

##################################################################
# Correlation Between Reference Group Income and Estimated Global Median
##################################################################

data.working <- readRDS("d_r_cleaned_analysis_dataset.Rda")

ind.group.1 <- c("income.new.midpoint.10000")
ind.group.2 <- c("zip.median.10000")
ind.group.3 <- c("educat.ba.plus")
ind.group.4 <- c("income.new.midpoint.10000", "zip.median.10000", "educat.ba.plus")
list.ind.group <- list(ind.group.1, ind.group.2, ind.group.3, ind.group.4)

dep.var <- c("median.estimate.topcoded", "median.estimate")
reg.models <-vector("list", length(dep.var)) 
se.models <-vector("list", length(dep.var))
matrix <- vector("list", length(dep.var))

x=0 

for(i in 1:length(dep.var)){
	for(y in 1:length(list.ind.group)){
		my.formula <-paste(dep.var[i],'~', paste(list.ind.group[[y]], collapse= ' + '))
		temp.model <- lm(my.formula, data=data.working)
		temp.se <- coeftest(temp.model, vcov=vcovHC(temp.model,type="HC2"))[,2]
		x = x+1
		se.models[[x]] <- temp.se
		reg.models[[x]] <- temp.model
	}
}

varlabels <-       c("Household Income (USD/10,000)", "Median Income in Zipcode (USD/10,000)", "Completed College Dummy" )
specnotes <- c("Robust standard errors in parentheses.")
spectitle <- c("Correlation between Reference Group Income and Estimated Global Median Income (outliers above 100,00 included)")
outputfile <- c("tf_t_08_estimated_median_notopcoding.txt")
depvarcaption <- c("Estimated Global Median Income in USD")

stargazer(reg.models[[1]], reg.models[[2]], reg.models[[3]], reg.models[[4]],
          se=list(se.models[[1]], se.models[[2]], se.models[[3]], se.models[[4]]),
         type="text",
         title= spectitle,
          out=outputfile,
          no.space=TRUE, 
          model.numbers= TRUE,
         notes.append=FALSE,
          align=FALSE,
         covariate.labels=varlabels,
          header= FALSE,
          font.size="small",
          model.names=TRUE,
          digits=2,
          omit.stat = c("rsq", "f","adj.rsq"),
          dep.var.caption  = "",
          dep.var.labels.include=FALSE,
         df=FALSE,
         omit.table.layout="n"
)   

varlabels <-       c("Household Income (USD/10,000)", "Median Income in Zipcode (USD/10,000)", "Completed College Dummy" )
specnotes <- c("Robust standard errors in parentheses.")
spectitle <- c("Correlation between Reference Group Income and Estimated Global Median Income (outliers above 100,00 included)")
outputfile <- c("tf_t_09_estimated_median_notopcoding.txt")
depvarcaption <- c("Estimated Global Median Income in USD")

stargazer(reg.models[[5]], reg.models[[6]], reg.models[[7]], reg.models[[8]],
          se=list(se.models[[5]], se.models[[6]], se.models[[7]], se.models[[8]]),
         type="text",
         title= spectitle,
          out=outputfile,
          no.space=TRUE, 
          model.numbers= TRUE,
         notes.append=FALSE,
          align=FALSE,
         covariate.labels=varlabels,
          header= FALSE,
          font.size="small",
          model.names=TRUE,
          digits=2,
          omit.stat = c("rsq", "f","adj.rsq"),
          dep.var.caption  = "",
          dep.var.labels.include=FALSE,
         df=FALSE,
         omit.table.layout="n"
) 


##################################################################
 
 # Figure 6: Lowess of Median Income vs Own Income by Colllege Degree

data.working$educat.ba.plus <- as.factor(data.working$educat.ba.plus)
data.working <- data.working[data.working$group==2|data.working$group==3,]
median.income <- ggplot(data.working, aes(income.new.midpoint, median.estimate.topcoded, group=educat.ba.plus, linetype=educat.ba.plus))
median.income <- median.income + stat_smooth(level=0.90, fill=c("gray80")) + 
 scale_x_continuous(breaks=c(20000, 40000, 60000, 80000, 100000), limits=c(0, 125000), labels=c("20,000", "40,000","60,000","80,000","100,000")) + 
  xlab("Household Income ($)") +
  ylab("Estimate of Global Median Income ($)")  +
  theme_bw() +
  theme(axis.text=element_text(size=10), axis.title=element_text(size=10), plot.margin=unit(c(0.2,0.2,0.2,0.2), "cm")) +
  aes(colour = factor(educat.ba.plus)) +
  scale_linetype_manual(values = c(rep("dashed", 1), rep("solid", 1)), breaks=c(0, 1), labels=c("No Information", "Information")) +
  scale_colour_manual(values=c("red", "navyblue"), breaks=c(0, 1), labels=c("No Information", "Information")) +
  theme(legend.position="none") 
  #geom_rug(sides="b")
median.income
ggsave(median.income, filename="tf_f_06_median_estimate_hhincome_college.png", width=7,height=4)